#!pip install plotly
# !pip install sort-dataframeby-monthorweek
# !pip install sorted_months_weekdays
# !pip install cufflinks
!pip install chart_studio
Collecting chart_studio
Downloading chart_studio-1.1.0-py3-none-any.whl (64 kB)
---------------------------------------- 0.0/64.4 kB ? eta -:--:--
------------------- -------------------- 30.7/64.4 kB ? eta -:--:--
------------------- -------------------- 30.7/64.4 kB ? eta -:--:--
-------------------------------------- 64.4/64.4 kB 497.0 kB/s eta 0:00:00
Collecting retrying>=1.3.3
Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Requirement already satisfied: plotly in c:\users\hp\anaconda3\lib\site-packages (from chart_studio) (5.14.1)
Requirement already satisfied: requests in c:\users\hp\anaconda3\lib\site-packages (from chart_studio) (2.29.0)
Requirement already satisfied: six in c:\users\hp\anaconda3\lib\site-packages (from chart_studio) (1.16.0)
Requirement already satisfied: packaging in c:\users\hp\anaconda3\lib\site-packages (from plotly->chart_studio) (23.0)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\hp\anaconda3\lib\site-packages (from plotly->chart_studio) (8.2.2)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\hp\anaconda3\lib\site-packages (from requests->chart_studio) (2023.5.7)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\hp\anaconda3\lib\site-packages (from requests->chart_studio) (1.26.15)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\hp\anaconda3\lib\site-packages (from requests->chart_studio) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\hp\anaconda3\lib\site-packages (from requests->chart_studio) (3.4)
Installing collected packages: retrying, chart_studio
Successfully installed chart_studio-1.1.0 retrying-1.3.4
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import sort_dataframeby_monthorweek as sm
import warnings
import cufflinks as cf
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.io as pio
from plotly.offline import init_notebook_mode
warnings.filterwarnings('ignore')
pio.renderers.default = "notebook_connected"
init_notebook_mode(connected=False)
cf.go_offline()
df = pd.read_csv('hotel_bookings.csv')
df.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 2015-07-03 |
5 rows × 32 columns
df.shape
(119390, 32)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 119390 entries, 0 to 119389 Data columns (total 32 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 hotel 119390 non-null object 1 is_canceled 119390 non-null int64 2 lead_time 119390 non-null int64 3 arrival_date_year 119390 non-null int64 4 arrival_date_month 119390 non-null object 5 arrival_date_week_number 119390 non-null int64 6 arrival_date_day_of_month 119390 non-null int64 7 stays_in_weekend_nights 119390 non-null int64 8 stays_in_week_nights 119390 non-null int64 9 adults 119390 non-null int64 10 children 119386 non-null float64 11 babies 119390 non-null int64 12 meal 119390 non-null object 13 country 118902 non-null object 14 market_segment 119390 non-null object 15 distribution_channel 119390 non-null object 16 is_repeated_guest 119390 non-null int64 17 previous_cancellations 119390 non-null int64 18 previous_bookings_not_canceled 119390 non-null int64 19 reserved_room_type 119390 non-null object 20 assigned_room_type 119390 non-null object 21 booking_changes 119390 non-null int64 22 deposit_type 119390 non-null object 23 agent 103050 non-null float64 24 company 6797 non-null float64 25 days_in_waiting_list 119390 non-null int64 26 customer_type 119390 non-null object 27 adr 119390 non-null float64 28 required_car_parking_spaces 119390 non-null int64 29 total_of_special_requests 119390 non-null int64 30 reservation_status 119390 non-null object 31 reservation_status_date 119390 non-null object dtypes: float64(4), int64(16), object(12) memory usage: 29.1+ MB
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
# Dropping columns having high number of missing values
df = df.drop(columns = ['agent','company'],axis=1)
# country has got 488 rows with null values, so we just replace these null values with Unknown
df = df.fillna({'country':'Unknown'})
# Coverting reservation_status_date into a datetime data type
df['reservation_status_date'] = pd.to_datetime(df['reservation_status_date'])
df.describe(include='object')
| hotel | arrival_date_month | meal | country | market_segment | distribution_channel | reserved_room_type | assigned_room_type | deposit_type | customer_type | reservation_status | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 |
| unique | 2 | 12 | 5 | 178 | 8 | 5 | 10 | 12 | 3 | 4 | 3 |
| top | City Hotel | August | BB | PRT | Online TA | TA/TO | A | A | No Deposit | Transient | Check-Out |
| freq | 79330 | 13877 | 92310 | 48590 | 56477 | 97870 | 85994 | 74053 | 104641 | 89613 | 75166 |
for col in df.describe(include='object'):
print(col)
print(df[col].unique())
print('-'*60)
hotel ['Resort Hotel' 'City Hotel'] ------------------------------------------------------------ arrival_date_month ['July' 'August' 'September' 'October' 'November' 'December' 'January' 'February' 'March' 'April' 'May' 'June'] ------------------------------------------------------------ meal ['BB' 'FB' 'HB' 'SC' 'Undefined'] ------------------------------------------------------------ country ['PRT' 'GBR' 'USA' 'ESP' 'IRL' 'FRA' 'Unknown' 'ROU' 'NOR' 'OMN' 'ARG' 'POL' 'DEU' 'BEL' 'CHE' 'CN' 'GRC' 'ITA' 'NLD' 'DNK' 'RUS' 'SWE' 'AUS' 'EST' 'CZE' 'BRA' 'FIN' 'MOZ' 'BWA' 'LUX' 'SVN' 'ALB' 'IND' 'CHN' 'MEX' 'MAR' 'UKR' 'SMR' 'LVA' 'PRI' 'SRB' 'CHL' 'AUT' 'BLR' 'LTU' 'TUR' 'ZAF' 'AGO' 'ISR' 'CYM' 'ZMB' 'CPV' 'ZWE' 'DZA' 'KOR' 'CRI' 'HUN' 'ARE' 'TUN' 'JAM' 'HRV' 'HKG' 'IRN' 'GEO' 'AND' 'GIB' 'URY' 'JEY' 'CAF' 'CYP' 'COL' 'GGY' 'KWT' 'NGA' 'MDV' 'VEN' 'SVK' 'FJI' 'KAZ' 'PAK' 'IDN' 'LBN' 'PHL' 'SEN' 'SYC' 'AZE' 'BHR' 'NZL' 'THA' 'DOM' 'MKD' 'MYS' 'ARM' 'JPN' 'LKA' 'CUB' 'CMR' 'BIH' 'MUS' 'COM' 'SUR' 'UGA' 'BGR' 'CIV' 'JOR' 'SYR' 'SGP' 'BDI' 'SAU' 'VNM' 'PLW' 'QAT' 'EGY' 'PER' 'MLT' 'MWI' 'ECU' 'MDG' 'ISL' 'UZB' 'NPL' 'BHS' 'MAC' 'TGO' 'TWN' 'DJI' 'STP' 'KNA' 'ETH' 'IRQ' 'HND' 'RWA' 'KHM' 'MCO' 'BGD' 'IMN' 'TJK' 'NIC' 'BEN' 'VGB' 'TZA' 'GAB' 'GHA' 'TMP' 'GLP' 'KEN' 'LIE' 'GNB' 'MNE' 'UMI' 'MYT' 'FRO' 'MMR' 'PAN' 'BFA' 'LBY' 'MLI' 'NAM' 'BOL' 'PRY' 'BRB' 'ABW' 'AIA' 'SLV' 'DMA' 'PYF' 'GUY' 'LCA' 'ATA' 'GTM' 'ASM' 'MRT' 'NCL' 'KIR' 'SDN' 'ATF' 'SLE' 'LAO'] ------------------------------------------------------------ market_segment ['Direct' 'Corporate' 'Online TA' 'Offline TA/TO' 'Complementary' 'Groups' 'Undefined' 'Aviation'] ------------------------------------------------------------ distribution_channel ['Direct' 'Corporate' 'TA/TO' 'Undefined' 'GDS'] ------------------------------------------------------------ reserved_room_type ['C' 'A' 'D' 'E' 'G' 'F' 'H' 'L' 'P' 'B'] ------------------------------------------------------------ assigned_room_type ['C' 'A' 'D' 'E' 'G' 'F' 'I' 'B' 'H' 'P' 'L' 'K'] ------------------------------------------------------------ deposit_type ['No Deposit' 'Refundable' 'Non Refund'] ------------------------------------------------------------ customer_type ['Transient' 'Contract' 'Transient-Party' 'Group'] ------------------------------------------------------------ reservation_status ['Check-Out' 'Canceled' 'No-Show'] ------------------------------------------------------------
df.describe()
| is_canceled | lead_time | arrival_date_year | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | booking_changes | days_in_waiting_list | adr | required_car_parking_spaces | total_of_special_requests | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119386.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 |
| mean | 0.370416 | 104.011416 | 2016.156554 | 27.165173 | 15.798241 | 0.927599 | 2.500302 | 1.856403 | 0.103890 | 0.007949 | 0.031912 | 0.087118 | 0.137097 | 0.221124 | 2.321149 | 101.831122 | 0.062518 | 0.571363 |
| std | 0.482918 | 106.863097 | 0.707476 | 13.605138 | 8.780829 | 0.998613 | 1.908286 | 0.579261 | 0.398561 | 0.097436 | 0.175767 | 0.844336 | 1.497437 | 0.652306 | 17.594721 | 50.535790 | 0.245291 | 0.792798 |
| min | 0.000000 | 0.000000 | 2015.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -6.380000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 18.000000 | 2016.000000 | 16.000000 | 8.000000 | 0.000000 | 1.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 69.290000 | 0.000000 | 0.000000 |
| 50% | 0.000000 | 69.000000 | 2016.000000 | 28.000000 | 16.000000 | 1.000000 | 2.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 94.575000 | 0.000000 | 0.000000 |
| 75% | 1.000000 | 160.000000 | 2017.000000 | 38.000000 | 23.000000 | 2.000000 | 3.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 126.000000 | 0.000000 | 1.000000 |
| max | 1.000000 | 737.000000 | 2017.000000 | 53.000000 | 31.000000 | 19.000000 | 50.000000 | 55.000000 | 10.000000 | 10.000000 | 1.000000 | 26.000000 | 72.000000 | 21.000000 | 391.000000 | 5400.000000 | 8.000000 | 5.000000 |
df['adr'].plot(kind='box')
<Axes: >
df = df[df['adr']<5000]
df['is_canceled'].value_counts(normalize=True)
0 0.629589 1 0.370411 Name: is_canceled, dtype: float64
df['is_canceled'].value_counts()
0 75166 1 44223 Name: is_canceled, dtype: int64
df['country'].value_counts(normalize=True)
PRT 0.406981
GBR 0.101592
FRA 0.087236
ESP 0.071765
DEU 0.061036
...
DJI 0.000008
BWA 0.000008
HND 0.000008
VGB 0.000008
NAM 0.000008
Name: country, Length: 178, dtype: float64
df['arrival_date_month'].value_counts(normalize=True)*100
August 11.623349 July 10.604830 May 9.876119 October 9.347595 April 9.288125 June 9.162486 September 8.801481 March 8.202598 February 6.757742 November 5.690642 December 5.678915 January 4.966119 Name: arrival_date_month, dtype: float64
df['market_segment'].value_counts(normalize=True)*100
Online TA 47.305028 Offline TA/TO 20.284951 Groups 16.593656 Direct 10.558762 Corporate 4.435082 Complementary 0.622335 Aviation 0.198511 Undefined 0.001675 Name: market_segment, dtype: float64
df['customer_type'].value_counts(normalize=True)
Transient 0.750588 Transient-Party 0.210438 Contract 0.034140 Group 0.004833 Name: customer_type, dtype: float64
df['reservation_status'].value_counts(normalize=True)
Check-Out 0.629589 Canceled 0.360301 No-Show 0.010110 Name: reservation_status, dtype: float64
fig = px.bar(df,x=['Not canceled','canceled'],y=df['is_canceled'].value_counts(normalize=True)*100,template='plotly_dark',title='Bookings Status Count')
fig.update_layout(xaxis_title='Bookings Status',yaxis_title='Percentage of bookings')
fig.show()
fig1 = px.histogram(df,x='hotel',color='is_canceled',barmode='group',template='plotly_dark')
fig1.update_layout(
title="Cancellation rates in Resort and City Hotels",
title_font=dict(size=20),
xaxis=dict(title="Hotel"),
yaxis=dict(title="count")
)
fig1.show()
df['month'] = df['reservation_status_date'].dt.month
month_mapping = {
1: 'January',
2: 'February',
3: 'March',
4: 'April',
5: 'May',
6: 'June',
7: 'July',
8: 'August',
9: 'September',
10: 'October',
11: 'November',
12: 'December'
}
df['month'] = df['month'].map(month_mapping)
df = sm.Sort_Dataframeby_Month(df,'month')
room_prices_monthly = df[df['is_canceled']==0]
months = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"]
room_prices_monthly['month'] = pd.Categorical(room_prices_monthly['month'],categories=months,ordered=True)
resort_data = room_prices_monthly[room_prices_monthly['hotel']=='Resort Hotel']
resort_data = pd.DataFrame(resort_data.groupby(['month'])['adr'].mean().reset_index())
city_data = room_prices_monthly[room_prices_monthly['hotel']=='City Hotel']
city_data = pd.DataFrame(city_data.groupby(['month'])['adr'].mean().reset_index())
final_data = resort_data.merge(city_data,on='month')
final_data.columns = ['month','resort_hotel_prices','city_hotel_prices']
plt.figure(figsize=(10,5))
fig = px.line(final_data,x='month',y=['resort_hotel_prices','city_hotel_prices'],title = 'Room price per night over the Months', template = 'plotly_dark')
fig.update_layout(xaxis=dict(
tickangle=45),
yaxis_title = 'Prices')
fig.show()
<Figure size 1000x500 with 0 Axes>
data_guests = df[df['is_canceled']==0]
data_guests = data_guests[['month','hotel']]
data_resort = data_guests[data_guests['hotel']=='Resort Hotel']
resort_guests = data_resort['month'].value_counts().reset_index()
resort_guests.columns=['month','no_of_guests']
resort_guests
| month | no_of_guests | |
|---|---|---|
| 0 | August | 3268 |
| 1 | July | 2977 |
| 2 | October | 2647 |
| 3 | May | 2546 |
| 4 | April | 2529 |
| 5 | March | 2434 |
| 6 | February | 2240 |
| 7 | September | 2238 |
| 8 | November | 2157 |
| 9 | January | 2090 |
| 10 | June | 2032 |
| 11 | December | 1780 |
data_city = data_guests[data_guests['hotel']=='City Hotel']
city_guests = data_city['month'].value_counts().reset_index()
city_guests.columns=['month','no_of_guests']
city_guests
| month | no_of_guests | |
|---|---|---|
| 0 | August | 5293 |
| 1 | July | 4760 |
| 2 | May | 4620 |
| 3 | September | 4489 |
| 4 | October | 4351 |
| 5 | June | 4334 |
| 6 | April | 4000 |
| 7 | March | 3976 |
| 8 | February | 2900 |
| 9 | November | 2882 |
| 10 | January | 2530 |
| 11 | December | 2093 |
final_guests = resort_guests.merge(city_guests,on='month')
final_guests.columns=['month','no_of_guests_in_resort','no_of_guests_in_city_hotel']
final_guests
| month | no_of_guests_in_resort | no_of_guests_in_city_hotel | |
|---|---|---|---|
| 0 | August | 3268 | 5293 |
| 1 | July | 2977 | 4760 |
| 2 | October | 2647 | 4351 |
| 3 | May | 2546 | 4620 |
| 4 | April | 2529 | 4000 |
| 5 | March | 2434 | 3976 |
| 6 | February | 2240 | 2900 |
| 7 | September | 2238 | 4489 |
| 8 | November | 2157 | 2882 |
| 9 | January | 2090 | 2530 |
| 10 | June | 2032 | 4334 |
| 11 | December | 1780 | 2093 |
final_guests = sm.Sort_Dataframeby_Month(final_guests,'month')
final_guests
| month | no_of_guests_in_resort | no_of_guests_in_city_hotel | |
|---|---|---|---|
| 0 | January | 2090 | 2530 |
| 1 | February | 2240 | 2900 |
| 2 | March | 2434 | 3976 |
| 3 | April | 2529 | 4000 |
| 4 | May | 2546 | 4620 |
| 5 | June | 2032 | 4334 |
| 6 | July | 2977 | 4760 |
| 7 | August | 3268 | 5293 |
| 8 | September | 2238 | 4489 |
| 9 | October | 2647 | 4351 |
| 10 | November | 2157 | 2882 |
| 11 | December | 1780 | 2093 |
px.line(final_guests, x = 'month', y = ['no_of_guests_in_resort','no_of_guests_in_city_hotel'],
title='Total no. of guests per Month', template = 'plotly_dark')
fig = px.histogram(df, x="is_canceled", color="is_repeated_guest", barmode="group",template='plotly_dark')
fig.update_layout(
title="Canceled or not",
title_font=dict(size=20),
xaxis=dict(title="Cancellation Status"),
yaxis=dict(title="Count")
)
fig.show()
plt.figure(figsize=(10,5))
ax = sns.countplot(x='month',hue='is_canceled',data=df,palette='Blues')
plt.title('Reservation status per month')
plt.xlabel('month')
plt.ylabel('No. of reservations')
plt.xticks(rotation=45)
plt.legend(['not canceled','canceled'])
plt.show()
# Assuming the 'df' variable is a pandas DataFrame
canceled_data = df[df['is_canceled'] == 1]
top_10_country = canceled_data['country'].value_counts()[:10].reset_index()
top_10_country.columns = ['country','No. of guests']
fig = px.pie(top_10_country,names='country',values='No. of guests',title='Top 10 countries with reservation canceled',template='simple_white')
fig.update_traces(textposition='inside',textinfo='label+value+percent')
fig.show()
fig = px.choropleth(top_10_country, locations='country',color='No. of guests',
title='Top 10 Countries with Bookings Canceled',
color_continuous_scale='Viridis',
labels={'country': 'Country', 'No. of guests': 'cancellation_count'})
fig.show()
df['month'] = df['reservation_status_date'].dt.month
month_mapping = {
1: 'January',
2: 'February',
3: 'March',
4: 'April',
5: 'May',
6: 'June',
7: 'July',
8: 'August',
9: 'September',
10: 'October',
11: 'November',
12: 'December'
}
df['month'] = df['month'].map(month_mapping)
adr_per_month = pd.DataFrame(df[df['is_canceled']==1].groupby(['month'])[['adr']].sum().reset_index())
adr_per_month = sm.Sort_Dataframeby_Month(adr_per_month,'month')
adr_per_month
| month | adr | |
|---|---|---|
| 0 | January | 571261.17 |
| 1 | February | 447915.76 |
| 2 | March | 424985.36 |
| 3 | April | 425413.23 |
| 4 | May | 391438.08 |
| 5 | June | 359688.89 |
| 6 | July | 482430.62 |
| 7 | August | 325786.93 |
| 8 | September | 275528.33 |
| 9 | October | 331156.87 |
| 10 | November | 281367.90 |
| 11 | December | 319569.53 |
fig = px.bar(adr_per_month,'month','adr',barmode='group',template='plotly_dark')
fig.update_layout(
title="Average Daily Rate (ADR) per Month for Canceled Bookings",
xaxis = dict(title="month",tickangle=-30),
yaxis_title="Average Daily Rate (ADR) per Month"
)
fig.show()
market_segment_data = pd.DataFrame(df['market_segment'].value_counts().reset_index(name='count'))
market_segment_data.columns=['market_segment','Count']
market_segment_data
| market_segment | Count | |
|---|---|---|
| 0 | Online TA | 56477 |
| 1 | Offline TA/TO | 24218 |
| 2 | Groups | 19811 |
| 3 | Direct | 12606 |
| 4 | Corporate | 5295 |
| 5 | Complementary | 743 |
| 6 | Aviation | 237 |
| 7 | Undefined | 2 |
px.bar(market_segment_data,x='market_segment',y='Count',template='plotly_dark')
distribution_channel_data = pd.DataFrame(df['distribution_channel'].value_counts().reset_index(name='count'))
distribution_channel_data.columns=['distribution_channel','Count']
distribution_channel_data
| distribution_channel | Count | |
|---|---|---|
| 0 | TA/TO | 97869 |
| 1 | Direct | 14645 |
| 2 | Corporate | 6677 |
| 3 | GDS | 193 |
| 4 | Undefined | 5 |
px.bar(distribution_channel_data,x='distribution_channel',y='Count',template='plotly_dark')